Downloading US Census Data from API

This notebook shows how to donwload all blockgroup data for a set of variable You can change to download different geographies*

It uses three external libraries:

Sunlight Labs Census Wrapper - Joe's Fork Note: blockgroups will not work with pip install census but may with other geographies.
US States Library
Pandas



In [1]:

    
# may need to point this to your census module
from us_census_api.core import Census 
# Download from pypi
from us import states

from pandas import DataFrame
import pandas as pd



In [2]:

    
census_API_key = 'API_KEY_HERE'

c = Census(census_API_key, year=2010)



In [4]:

    
# Test basic API call
data = DataFrame(c.acs5.state_county_blockgroup(('NAME', 'GEOID'), 36, '5', Census.ALL))
data.head()









    Out[4]:






  
    
      
      GEOID
      NAME
      block group
      county
      state
      tract
    
  
  
    
      0
      15000US360050001000
      Block Group 0, Census Tract 1, Bronx County, N...
      0
      005
      36
      000100
    
    
      1
      15000US360050001001
      Block Group 1, Census Tract 1, Bronx County, N...
      1
      005
      36
      000100
    
    
      2
      15000US360050002000
      Block Group 0, Census Tract 2, Bronx County, N...
      0
      005
      36
      000200
    
    
      3
      15000US360050002001
      Block Group 1, Census Tract 2, Bronx County, N...
      1
      005
      36
      000200
    
    
      4
      15000US360050002002
      Block Group 2, Census Tract 2, Bronx County, N...
      2
      005
      36
      000200



In [12]:

    
# variable list:

var_edu =  {
    'GEOID':'GeoId',
    'B15002_001E': 'Edu Total',
    'B15002_002E': 'Edu Male Total',
}

for i in range(3,19):
    key = 'B15002_{}E'.format(str(i).zfill(3))
    var_edu[key] = 'Edu Male Level {}'.format(i-3)

for i in range(20,36):
    key = 'B15002_{}E'.format(str(i).zfill(3))
    var_edu[key] = 'Edu Female Level {}'.format(i-20)

var_edu_tup = tuple(key for key, val in var_edu.items())



In [13]:

    
var_edu









    Out[13]:





{'B15002_001E': 'Edu Total',
 'B15002_002E': 'Edu Male Total',
 'B15002_003E': 'Edu Male Level 0',
 'B15002_004E': 'Edu Male Level 1',
 'B15002_005E': 'Edu Male Level 2',
 'B15002_006E': 'Edu Male Level 3',
 'B15002_007E': 'Edu Male Level 4',
 'B15002_008E': 'Edu Male Level 5',
 'B15002_009E': 'Edu Male Level 6',
 'B15002_010E': 'Edu Male Level 7',
 'B15002_011E': 'Edu Male Level 8',
 'B15002_012E': 'Edu Male Level 9',
 'B15002_013E': 'Edu Male Level 10',
 'B15002_014E': 'Edu Male Level 11',
 'B15002_015E': 'Edu Male Level 12',
 'B15002_016E': 'Edu Male Level 13',
 'B15002_017E': 'Edu Male Level 14',
 'B15002_018E': 'Edu Male Level 15',
 'B15002_020E': 'Edu Female Level 0',
 'B15002_021E': 'Edu Female Level 1',
 'B15002_022E': 'Edu Female Level 2',
 'B15002_023E': 'Edu Female Level 3',
 'B15002_024E': 'Edu Female Level 4',
 'B15002_025E': 'Edu Female Level 5',
 'B15002_026E': 'Edu Female Level 6',
 'B15002_027E': 'Edu Female Level 7',
 'B15002_028E': 'Edu Female Level 8',
 'B15002_029E': 'Edu Female Level 9',
 'B15002_030E': 'Edu Female Level 10',
 'B15002_031E': 'Edu Female Level 11',
 'B15002_032E': 'Edu Female Level 12',
 'B15002_033E': 'Edu Female Level 13',
 'B15002_034E': 'Edu Female Level 14',
 'B15002_035E': 'Edu Female Level 15',
 'GEOID': 'GeoId'}



In [14]:

    
# Test API call with variables
data = DataFrame(c.acs5.state_county_blockgroup(var_edu_tup, 36, '5', Census.ALL))
data.head()









    Out[14]:






  
    
      
      B15002_001E
      B15002_002E
      B15002_003E
      B15002_004E
      B15002_005E
      B15002_006E
      B15002_007E
      B15002_008E
      B15002_009E
      B15002_010E
      ...
      B15002_031E
      B15002_032E
      B15002_033E
      B15002_034E
      B15002_035E
      GEOID
      block group
      county
      state
      tract
    
  
  
    
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      15000US360050001000
      0
      005
      36
      000100
    
    
      1
      7943
      6931
      102
      70
      260
      278
      438
      1025
      1366
      1139
      ...
      19
      45
      6
      0
      8
      15000US360050001001
      1
      005
      36
      000100
    
    
      2
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      15000US360050002000
      0
      005
      36
      000200
    
    
      3
      454
      194
      0
      0
      0
      74
      0
      0
      0
      0
      ...
      40
      7
      0
      0
      0
      15000US360050002001
      1
      005
      36
      000200
    
    
      4
      1152
      563
      0
      0
      0
      0
      0
      14
      0
      12
      ...
      29
      95
      45
      0
      0
      15000US360050002002
      2
      005
      36
      000200
    
  

5 rows × 39 columns



In [18]:

    
bkgps = {}



In [19]:

    
# THIS DOWNLOADS ALL THE DATA! 
# It can take awhile. 

# If it breaks:
# 1. check what state was the last state you got. assume you only got part of that state
# 2. delete that state in bkgps dict
# 3. change states.STATES to states.STATES[5:] (or wherever it broke)
# 4. repeat

for state in states.STATES:
    print(state)
    
    counties = c.acs5.state_county('NAME', state.fips, Census.ALL)
    data = []
    for county in counties:
        county_num = county['county']
        data.extend(c.acs5.state_county_blockgroup(var_edu_tup, state.fips, county_num, Census.ALL))
    bkgps[state] = data









    



Alabama
Alaska
Arizona
Arkansas
California
Colorado
Connecticut
Delaware
District of Columbia
Florida
Georgia
Hawaii
Idaho
Illinois
Indiana
Iowa
Kansas
Kentucky
Louisiana
Maine
Maryland
Massachusetts
Michigan
Minnesota
Mississippi
Missouri
Montana
Nebraska
Nevada
New Hampshire
New Jersey
New Mexico
New York
North Carolina
North Dakota
Ohio
Oklahoma
Oregon
Pennsylvania
Rhode Island
South Carolina
South Dakota
Tennessee
Texas
Utah
Vermont
Virginia
Washington
West Virginia
Wisconsin
Wyoming






    Out[19]:






  
    
      
      B15002_001E
      B15002_002E
      B15002_003E
      B15002_004E
      B15002_005E
      B15002_006E
      B15002_007E
      B15002_008E
      B15002_009E
      B15002_010E
      ...
      B15002_031E
      B15002_032E
      B15002_033E
      B15002_034E
      B15002_035E
      GEOID
      block group
      county
      state
      tract
    
  
  
    
      0
      920
      388
      7
      0
      1
      24
      11
      5
      24
      30
      ...
      28
      54
      10
      0
      0
      15000US480019501001
      1
      001
      48
      950100
    
    
      1
      1560
      907
      0
      11
      43
      11
      25
      61
      97
      15
      ...
      37
      77
      6
      0
      3
      15000US480019501002
      2
      001
      48
      950100
    
    
      2
      1251
      745
      0
      31
      19
      30
      0
      3
      17
      0
      ...
      14
      25
      8
      0
      0
      15000US480019501003
      3
      001
      48
      950100
    
    
      3
      6876
      6843
      28
      66
      125
      571
      477
      634
      794
      64
      ...
      8
      0
      0
      0
      0
      15000US480019504011
      1
      001
      48
      950401
    
    
      4
      3574
      3507
      28
      0
      23
      271
      223
      249
      351
      82
      ...
      16
      0
      0
      0
      0
      15000US480019504021
      1
      001
      48
      950402
    
  

5 rows × 39 columns



In [ ]:

    
bkgpsDF = []
for key, val in bkgps.items():
    bkgpsDF.extend(val)
bkgpDF = DataFrame(bkgpsDF)
bkgpDF.head()



In [20]:

    
bkgpDF.to_csv('/Users/joe/Dropbox/SFI_CensusData/UnitedStates/2010acs_edu.csv', index=False)



In [ ]:

	GEOID	NAME	block group	county	state	tract
0	15000US360050001000	Block Group 0, Census Tract 1, Bronx County, N...	0	005	36	000100
1	15000US360050001001	Block Group 1, Census Tract 1, Bronx County, N...	1	005	36	000100
2	15000US360050002000	Block Group 0, Census Tract 2, Bronx County, N...	0	005	36	000200
3	15000US360050002001	Block Group 1, Census Tract 2, Bronx County, N...	1	005	36	000200
4	15000US360050002002	Block Group 2, Census Tract 2, Bronx County, N...	2	005	36	000200

	B15002_001E	B15002_002E	B15002_003E	B15002_004E	B15002_005E	B15002_006E	B15002_007E	B15002_008E	B15002_009E	B15002_010E	...	B15002_031E	B15002_032E	B15002_033E	B15002_035E	GEOID	block group	county	state	tract
0	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	15000US360050001000	0	005	36	000100
1	7943	6931	102	70	260	278	438	1025	1366	1139	...	19	45	6	8	15000US360050001001	1	005	36	000100
2	0	0	0	0	0	0	0	0	0	0	...	0	0	0	0	15000US360050002000	0	005	36	000200
3	454	194	0	0	0	74	0	0	0	0	...	40	7	0	0	15000US360050002001	1	005	36	000200
4	1152	563	0	0	0	0	0	14	0	12	...	29	95	45	0	15000US360050002002	2	005	36	000200

	B15002_001E	B15002_002E	B15002_003E	B15002_004E	B15002_005E	B15002_006E	B15002_007E	B15002_008E	B15002_009E	B15002_010E	...	B15002_031E	B15002_032E	B15002_033E	B15002_035E	GEOID	block group	county	state	tract
0	920	388	7	0	1	24	11	5	24	30	...	28	54	10	0	15000US480019501001	1	001	48	950100
1	1560	907	0	11	43	11	25	61	97	15	...	37	77	6	3	15000US480019501002	2	001	48	950100
2	1251	745	0	31	19	30	0	3	17	0	...	14	25	8	0	15000US480019501003	3	001	48	950100
3	6876	6843	28	66	125	571	477	634	794	64	...	8	0	0	0	15000US480019504011	1	001	48	950401
4	3574	3507	28	0	23	271	223	249	351	82	...	16	0	0	0	15000US480019504021	1	001	48	950402